import pandas as pd
import glob
import soundfile as sf

dom_df = pd.read_csv("../../src/extracted/DESED_FSD50K_domestic_sounds.csv")
lm_df = pd.read_csv("../../src/extracted/DESED_FSD50K_label_mapping_non_single_speech.csv")
wav_root = "/nvmework1/shaonian/Datasets/FSD50K/extracted/"

dom_filenames = [wav_root + str(f) + ".wav" for f in dom_df["fname"]]
lm_filenames = [wav_root + str(f) + ".wav" for f in lm_df["fname"]]

# output the duration
def get_duration(filenames):
    duration = 0
    for f in filenames:
        try:
            audio = sf.info(f).duration
            duration = duration + audio
        except Exception as e:
            print(f"Error reading {f}: {e}")
    return duration / 3600

dom_durations = get_duration(dom_filenames)
lm_durations = get_duration(lm_filenames)

print(f"Total domestic sounds: {dom_durations}")
print(f"Total label mapping: {lm_durations}")